* This code reproduces all the analyses reported in Kloehn, Liu, and Spamann. Written for Stata 14 MP
* required packages: estout
* NB: the information in tables 1 and 2 is generated but not saved as a table (because to do so and incorporate all info isn't obvious within Stata); this needs to be done by hand


set more off
cap log close
log using Kloehn_Liu_Spamann_reproduction, replace

tempfile bin first endtime maindata maindata_incl_withdrawn // if you want to keep the cleaned data for further analysis, change the code at end of part I so that maindata* are saved as normal datasets

/************************************************************
	I. Data Clean & Assembly
************************************************************/

* decisions & reasons
import excel Judges_participations.xlsx, firstrow clear // this is the Excel version of participations_2016-04-18T12-59-10+00-00.csv created by the experiment software. Stata doesn't propearly handle commas in text when ingesting csv directly
rename exitquestion#* exitq#
rename (randomID canceledexplicitorauto judgmentg primings anchor exitq1 exitq2 exitq3 exitq4 exitq15 exitq17 nationality precedent datausageallowed0withdrawn) ///
	(randomID canceled guilty prime anchor confidence sentence knowICL recognize chn_courtlevel chn_court_division nat_ prec_ withdrawal)
recode withdrawal (0=1) (1=0) (.=0)
local t=1
foreach step in start consent "instructions read" "documents finished" "judgment rendered" exit {
	gen double time`t'=clock(timestampstep`t',"YMD#hms#")
	format time`t' %tc
	label var time`t++' "time when clicked: `step'"
	}
preserve // generating `endtime' for document views below: needed to calculate how much time was spent with LAST document
	keep randomID time4
	rename time4 time
	gen document = "end"
	save `endtime'
restore
label define nationality 1 "croatian" 2 "serbian"
label define precedent 1 "sainovic" 2 "vasiljevic" 3 "besic"
encode nat_, gen(nationality) label(nationality)
encode prec_, gen(precedent) label(precedent)
replace sentence = regexs(1) if regexm(sentence,"([0-9]+)") 
destring sentence, force replace
replace guilty = . if mi(time5)
gen duration=minutes(time5-time2)
gen timedoc=minutes(time4-time3)
drop if	  canceled ///
		| (randomID ==  "hhlairgpsgq8n9h9eus7nlefp5") /// hhlairgpsgq8n9h9eus7nlefp5 worked with English docs -- probably a preview participation, in any event unusable
		| !inlist(dofc(time1),date("2016-04-12", "YMD"),date("2016-04-18", "YMD")) /// April 12 and 18 are the dates Zhuang had judges in the seminar room -- in class on the 12th, after class on the 18th
		| duration < 10 /// t_total<10 are RAs testing the system
		| timedoc < 3 /// same
		| mi(guilty)  // incomplete participations
drop canceled timedoc shortID timestampstep* reasonf Amazon nat_ prec_ auto exitq5-exitq14
save `bin'

* click & display data
import delimited document_requests_2016-04-18T12-59-12+00-00.csv, varnames(1) clear // this is a list of clicks (indexed by participant and time stamp)
rename participationid randomID
drop if regexm(doc,"/en_US")  // this could have happened only in trial runs
replace document = subinstr(document,"/zh_CN","",1)
replace document = subinstr(document,"/documents/","",1)
replace document = "toc" if doc=="/documents"
replace document = "precedent" if regexm(doc,"precedent") // there are versions /2 and /3 in the data as well because of splitting of the long docs
replace document = "trialjudgment" if regexm(doc,"trial_judgment") // id.
gen double time = clock(timestamp,"YMD#hms#")

preserve // which came first -- precedent or statute?
	keep if inlist(document,"statute","precedent")
	bys randomID (time): keep if _n==1 // one obs per judge: the first
	gen first = document
	keep randomID first
	save `first'
restore

append using `endtime'
bys random (time): gen t_ = time[_n+1]-time // by participant: for every click, calculate time until next click, i.e., how much time participant spent with that document
assert t_==. if document=="end" // should be because nothing comes after "end" so time[_n+1] should be empty
drop if document=="end"
collapse (sum) t_, by(randomID document) // adding up multiple spells with the same document by the same participant, if any
replace t_ = minutes(t_) // changing unit to minutes
reshape wide t_, i(randomID) j(document) string // transform to one document per participant (from one observation per document per participant)
recode t_* (.=0) // we confirmed in the original csv's that many participants actually did not look at certain documents
egen t_total = rowtotal(t_*)
merge 1:1 randomID using `bin', assert(1 3) keep(3) nogenerate
merge 1:1 randomID using `first', keep(1 3) nogenerate // unmatched observation in master: 1 participant without look at statute or precedent --> missing first

* save
order randomID nationality precedent prime anchor time* duration
save `maindata_incl_withdrawn'
drop if withdraw
save `maindata'


/************************************************************
	II. Analysis
************************************************************/

foreach wd in "" "_incl_withdrawn" { // empty is results reported in paper; incl_withdrawn shows that results would be unaffected if we included participants who withdrew

use `maindata`wd'', clear
qui merge 1:1 randomID using Zhuang_judgmentreasons_attributes.dta, keep(1 3) // this is author Zhuang Liu's coding of precedent mentioned/followed and unresponsive

******************
di _newline "******* TABLE 1 and associated tests `wd' ********" 

* 1) The Table
table nationality precedent, c(mean guilty sum guilty n guilty) col row  // NB: order of nationality columns is opposite of paper's
* 2) Fisher's exact test, nationality
tab guilty nationality, exact // Vuković v. Horvat
* 3) Fisher's exact test, precedent
qui{
foreach restriction in "" "& prec_mention != 1" "& unresponsive != 1" {
	forvalues excludedprecedent=0/3 { // 0 is none
		tab guilty precedent if precedent !=`excludedprecedent' `restriction', exact
		noisily di "Precedent Fisher test: restriction `restriction', excludedprecedent `excludedprecedent', p=" %4.3f r(p_exact)
		}
	}
* 4) Covariate Balance 
di _newline "covariate balance"
forvalues i=0/3 {
foreach covariate of varlist exitq16 knowICL recognize chn_court* {
	tab `covariate' precedent if precedent !=`i', chi2 // chi2 overrejects, but that's good for these tests -- biasing us against ourselves
	noisily di "Covariate balance, precedent: excluded precedent `i', covariate `covariate', p=" %4.3f r(p)
}
}

foreach covariate of varlist exitq16 knowICL recognize chn_court* {
	tab `covariate' nationality, chi2
	noisily di "Covariate balance, nationality: covariate `covariate', p=" %4.3f r(p)
}
}

*****************
di _newline "******* TABLE 2 and associated tests `wd' *************"
* 1) The Table
local tvars " t_briefs t_statement t_statute t_trial t_precedent t_total "
local fvars " f_briefs f_statement f_statute f_trial f_precedent "
gen t_briefs = t_brief + t_brief_response
tabstat `tvars', f(%9.2f) c(s)
* generate fraction of time
foreach t in `tvars' {
	local doc: subinstr local t "t_" ""
	gen f_`doc' = `t'/t_total
}
tabstat `fvars', f(%9.2f) c(s)
* 2) statistical tests
signrank t_statute = t_precedent // Wilcoxon matched-pairs signed-ranks test
ttest t_statute == t_precedent // t-test of time in the footnote

* 3) associated summary statistics
di _newline "* order of looking at statute vs. precedent"
tab first // this shows how many judges looked at precedent first, and how many at statute
di _newline "* mention precedent?"
tab prec_mention

****************************
*** 3. Table 3 Regression***
qui{
*** 1) Cope with the missing data by coding them as factor level 0 (= unreported baseline)
encode exitq16, gen(age)
encode knowICL, gen(prior_know)
encode recognize, gen(recognition)
encode chn_courtlevel, gen(court_level)
encode chn_court_division, gen(division)
recode age prior_know recognition court_level division (.=0)
*** 2) regression 
estimates clear
qui{
_eststo: reg guilty i.precedent, robust // baseline regression
	test 2.precedent = 3.precedent // Wald Test of Vasiljevic vs. Besic
	estadd scalar p_V_v_B = r(p)
	
foreach control in "" i.age i.prior_know i.recognition i.court_level i.division { // adding controls one by one
	_eststo: reg guilty i.precedent nationality `control', robust
	test 2.precedent = 3.precedent 
	estadd scalar p_V_v_B = r(p)
	}
	
_eststo: reg guilty i.precedent nationality i.age i.prior_know i.recognition i.court_level i.division, robust // full regression
	test 2.precedent = 3.precedent
	estadd scalar p_V_v_B = r(p)
	}
}
esttab using Table3`wd'.csv, stat(r2 N p_V_v_B) b(%4.2f) se star(* 0.1 ** 0.05 *** 0.01) nobaselevels label replace
}

log close